In [1]:
import pandas as pd

from scrapenhl2.scrape import autoupdate, schedules, team_info, players
from scrapenhl2.manipulate import manipulate as manip

The purpose of this script is to get game-by-game 5v5 toi counts by player and team for every game since 2012-13. We can get this information from the 5v5 player log easily.


In [2]:
# Update data
# autoupdate.autoupdate()  # Comment in if needed, and loop if needed
# manip.get_5v5_player_log(2017, force_create)  # Comment in if needed, and loop if needed
log = pd.concat([manip.get_5v5_player_log(season).assign(Season=season) for season in range(2012, 2018)])
sch = pd.concat([schedules.get_season_schedule(season).assign(Season=season) for season in range(2012, 2018)])
log.head()


Out[2]:
PlayerID Game TOION TeamTOI TOIOFF CAON CFON TeamCA TeamCF CFOFF ... E-OtF N NDL NDR NOL NOR OL OR TeamID Season
0 8460542.0 20006 0.222500 3034.0 0.620278 6.0 17.0 31.0 45.0 28.0 ... 15.0 2.0 0.0 2.0 2.0 0.0 0.0 0.0 1 2012
1 8460542.0 20030 0.196389 2515.0 0.502222 12.0 9.0 32.0 24.0 15.0 ... 9.0 0.0 5.0 3.0 0.0 0.0 0.0 5.0 1 2012
2 8460542.0 20052 0.176111 2416.0 0.495000 5.0 10.0 27.0 29.0 19.0 ... 10.0 5.0 0.0 2.0 0.0 0.0 0.0 2.0 1 2012
3 8460542.0 20067 0.219444 2936.0 0.596111 4.0 14.0 45.0 29.0 15.0 ... 11.0 7.0 2.0 0.0 0.0 0.0 2.0 4.0 1 2012
4 8460542.0 20067 0.219444 2936.0 0.596111 4.0 14.0 45.0 29.0 15.0 ... 11.0 7.0 2.0 0.0 0.0 0.0 2.0 4.0 1 2012

5 rows × 62 columns

All we need to do is:

  • Sum TOION and TOIOFF, and take distinct values to get team counts
  • Take TOION for individual counts

In [3]:
# Teams
teamtoi = log.assign(TOI=log.TOION + log.TOIOFF) \
    [['Season', 'Game', 'TOI']] \
    .groupby(['Season', 'Game'], as_index=False) \
    .max()  # take max to avoid floating point errors that may fell drop_duplicates
teamtoi = sch[['Season', 'Game', 'Home', 'Road']] \
    .melt(id_vars=['Season', 'Game'], var_name='HR', value_name='TeamID') \
    .merge(teamtoi, how='inner', on=['Season', 'Game']) \
    .drop_duplicates()
    
# Make names into str, and convert TOI from hours to minutes
teamtoi.loc[:, 'Team'] = teamtoi.TeamID.apply(lambda x: team_info.team_as_str(x))
teamtoi.loc[:, 'TOI(min)'] = teamtoi.TOI * 60
teamtoi = teamtoi.drop(['TeamID', 'TOI'], axis=1)
teamtoi.head()


Out[3]:
Season Game HR Team TOI(min)
0 2012 20001 Home PHI 46.816667
1 2012 20001 Road PIT 46.816667
2 2012 20002 Home WPG 46.016667
3 2012 20002 Road OTT 46.016667
4 2012 20003 Home LAK 43.583333

In [4]:
# Individuals
indivtoi = log[['Season', 'Game', 'PlayerID', 'TOION', 'TeamID']]

# IDs to names and TOI from hours to minutes
indivtoi.loc[:, 'Player'] = players.playerlst_as_str(indivtoi.PlayerID.values)
indivtoi.loc[:, 'Team'] = indivtoi.TeamID.apply(lambda x: team_info.team_as_str(x))
indivtoi.loc[:, 'TOI(min)'] = indivtoi.TOION * 60

indivtoi = indivtoi.drop(['TeamID', 'TOION', 'PlayerID'], axis=1)
indivtoi.head()


/Users/muneebalam/anaconda/lib/python3.6/site-packages/pandas/core/indexing.py:337: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self.obj[key] = _infer_fill_value(value)
/Users/muneebalam/anaconda/lib/python3.6/site-packages/pandas/core/indexing.py:517: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self.obj[item] = s
Out[4]:
Season Game Player Team TOI(min)
0 2012 20006 Patrik Elias NJD 13.350000
1 2012 20030 Patrik Elias NJD 11.783333
2 2012 20052 Patrik Elias NJD 10.566667
3 2012 20067 Patrik Elias NJD 13.166667
4 2012 20067 Patrik Elias NJD 13.166667

In [5]:
# Write to file
teamtoi.to_csv('/Users/muneebalam/Desktop/teamtoi.csv')
indivtoi.to_csv('/Users/muneebalam/Desktop/indivtoi.csv')